In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
import sys
from six.moves import cPickle as pickle
%matplotlib inline

Read the training data


In [2]:
pickle_file = 'train.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_X_0 = save['data']
    train_outcome = save['outcome']
    del save  # hint to help gc free up memory

In [3]:
#reformat the label
#for each digit, add a 'end_digit' as '10'
#for each label, add a digit size
#each of them is a one-hot coding

def label_reformat(label, max_size = 5):
    digit_size = np.asarray([len(x) for x in label])
    digit_size[digit_size > max_size]= max_size
    digit_size = ((np.arange(max_size)+1) == digit_size[:,None]).astype(np.float32)
    
    digits = {}
    end_digit = 10.0
    for i in range(max_size):
        digit_coding = np.asarray( [x[i] if len(x)>i else end_digit for x in label])
        digit_coding = (np.arange(end_digit+1) == digit_coding[:,None]).astype(np.float32)
        digits['digit_'+ str(i)] = digit_coding
        
    return digit_size, digits

sample a smaller data


In [29]:
image_size = train_X_0.shape[1]
num_channels = train_X_0.shape[3]
batch_size = 20
val_size = 40
test_size = 50


train_label = train_outcome['label'][:5000]
train_digit_size, train_digits = label_reformat(train_label)
train_X = train_X_0[:5000]


val_label = train_outcome['label'][6200:6300]
val_digit_size, val_digits = label_reformat(val_label)
val_X = train_X_0[6200:6300]

val_size = val_X.shape[0]

In [30]:
print train_digit_size.shape
print train_digits['digit_0'].shape
print train_X.shape


(5000, 5)
(5000, 11)
(5000, 64, 64, 3)

start tensorflow session


In [31]:
sess = tf.InteractiveSession()

In [32]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [33]:
x_image = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))

y_d1 = tf.placeholder(tf.float32, shape=(batch_size, 11))
y_d2 = tf.placeholder(tf.float32, shape=(batch_size, 11))
y_d3 = tf.placeholder(tf.float32, shape=(batch_size, 11))
y_d4 = tf.placeholder(tf.float32, shape=(batch_size, 11))
y_d5 = tf.placeholder(tf.float32, shape=(batch_size, 11))

y_dsize = tf.placeholder(tf.float32, shape=(batch_size, 5))

In [34]:
val_x_image = tf.placeholder(tf.float32, shape=(val_size, image_size, image_size, num_channels))

val_y_d1 = tf.placeholder(tf.float32, shape=(val_size, 11))
val_y_d2 = tf.placeholder(tf.float32, shape=(val_size, 11))
val_y_d3 = tf.placeholder(tf.float32, shape=(val_size, 11))
val_y_d4 = tf.placeholder(tf.float32, shape=(val_size, 11))
val_y_d5 = tf.placeholder(tf.float32, shape=(val_size, 11))

val_y_dsize = tf.placeholder(tf.float32, shape=(val_size, 5))

test_x_image = tf.placeholder(tf.float32, shape=(test_size, image_size, image_size, num_channels))

test_y_d1 = tf.placeholder(tf.float32, shape=(test_size, 11))
test_y_d2 = tf.placeholder(tf.float32, shape=(test_size, 11))
test_y_d3 = tf.placeholder(tf.float32, shape=(test_size, 11))
test_y_d4 = tf.placeholder(tf.float32, shape=(test_size, 11))
test_y_d5 = tf.placeholder(tf.float32, shape=(test_size, 11))

test_y_dsize = tf.placeholder(tf.float32, shape=(test_size, 5))

In [35]:
def next_batch(X, y_dsize, y_ds, batch_size=50, replace = True):
    idx = np.random.choice(X.shape[0],batch_size, replace = replace)
    batch_x = X[idx,:,:,:]
    batch_y_dsize = y_dsize[idx,:]
    batch_y_d1 = y_ds['digit_0'][idx,:]
    batch_y_d2 = y_ds['digit_1'][idx,:]
    batch_y_d3 = y_ds['digit_2'][idx,:]
    batch_y_d4 = y_ds['digit_3'][idx,:]
    batch_y_d5 = y_ds['digit_4'][idx,:]
    
    return batch_x, batch_y_dsize, batch_y_d1, batch_y_d2, batch_y_d3, batch_y_d4, batch_y_d5

Construct CNN


In [36]:
W_conv1 = weight_variable([5, 5, num_channels, 32])
b_conv1 = bias_variable([32])

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

In [37]:
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

In [38]:
W_fc1 = weight_variable([16 * 16 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 16*16*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

Drop out layer


In [39]:
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

Fully connected layers several different softmax header, for different digits and digit size


In [40]:
#first digit
W_fc2_d1 = weight_variable([1024, 11])
b_fc2_d1 = bias_variable([11])

y_conv_d1 = tf.matmul(h_fc1_drop, W_fc2_d1) + b_fc2_d1

#second digit
W_fc2_d2 = weight_variable([1024, 11])
b_fc2_d2 = bias_variable([11])

y_conv_d2 = tf.matmul(h_fc1_drop, W_fc2_d2) + b_fc2_d2

#third digit
W_fc2_d3 = weight_variable([1024, 11])
b_fc2_d3 = bias_variable([11])

y_conv_d3 = tf.matmul(h_fc1_drop, W_fc2_d3) + b_fc2_d3

#fourth digit
W_fc2_d4 = weight_variable([1024, 11])
b_fc2_d4 = bias_variable([11])

y_conv_d4 = tf.matmul(h_fc1_drop, W_fc2_d4) + b_fc2_d4

#fifth digit
W_fc2_d5 = weight_variable([1024, 11])
b_fc2_d5 = bias_variable([11])

y_conv_d5 = tf.matmul(h_fc1_drop, W_fc2_d5) + b_fc2_d5

#digit size
W_fc2_dsize = weight_variable([1024, 5])
b_fc2_dsize = bias_variable([5])

y_conv_dsize = tf.matmul(h_fc1_drop, W_fc2_dsize) + b_fc2_dsize

In [41]:
cross_entropy = ( tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d1, y_d1)) 
                 + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d2, y_d2))
                 + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d3, y_d3))
                 + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d4, y_d4))
                 + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_d5, y_d5))
                 + tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_conv_dsize, y_dsize))
                 )

train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

In [42]:
#let's just check the first digit
correct_prediction = tf.equal(tf.argmax(y_conv_d1,1), tf.argmax(y_d1,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

Train model on a small data, see whether it overfit

if overfit, then good. If not, check bugs.


In [43]:
sess.run(tf.initialize_all_variables())
for i in range(1000):
    (batch_x, batch_y_dsize, 
     batch_y_d1, batch_y_d2, 
     batch_y_d3, batch_y_d4, batch_y_d5) = next_batch(train_X, 
                                                      train_digit_size, 
                                                      train_digits, batch_size)
        
    train_step.run(feed_dict={
            x_image: batch_x, y_dsize: batch_y_dsize,
            y_d1: batch_y_d1, y_d2: batch_y_d2, y_d3: batch_y_d3,
            y_d4: batch_y_d4, y_d5: batch_y_d5,
            keep_prob: 0.5})
    
    if i%10 == 0:
        train_accuracy = accuracy.eval(feed_dict={
                x_image: batch_x, y_dsize: batch_y_dsize,
                y_d1: batch_y_d1, y_d2: batch_y_d2, y_d3: batch_y_d3,
                y_d4: batch_y_d4, y_d5: batch_y_d5,
                keep_prob: 1.0})
        print("step %d, training accuracy %g"%(i, train_accuracy))
        
    if i%40 ==0:
        (batch_x, batch_y_dsize, 
         batch_y_d1, batch_y_d2, 
         batch_y_d3, batch_y_d4, batch_y_d5) = next_batch(val_X, 
                                                          val_digit_size, 
                                                          val_digits, batch_size)
        val_accuracy = accuracy.eval(feed_dict={
                    x_image: batch_x, y_dsize: batch_y_dsize,
                    y_d1: batch_y_d1, y_d2: batch_y_d2, y_d3: batch_y_d3,
                    y_d4: batch_y_d4, y_d5: batch_y_d5,
                    keep_prob: 1.0})
        print("step %d, val accuracy %g"%(i, val_accuracy))


step 0, training accuracy 0.1
step 0, val accuracy 0.05
step 10, training accuracy 0.15
step 20, training accuracy 0.25
step 30, training accuracy 0.2
step 40, training accuracy 0.2
step 40, val accuracy 0.3
step 50, training accuracy 0.35
step 60, training accuracy 0.1
step 70, training accuracy 0.35
step 80, training accuracy 0.2
step 80, val accuracy 0.35
step 90, training accuracy 0.25
step 100, training accuracy 0.05
step 110, training accuracy 0.2
step 120, training accuracy 0.1
step 120, val accuracy 0.3
step 130, training accuracy 0.1
step 140, training accuracy 0.2
step 150, training accuracy 0.05
step 160, training accuracy 0.2
step 160, val accuracy 0.25
step 170, training accuracy 0.15
step 180, training accuracy 0.3
step 190, training accuracy 0.25
step 200, training accuracy 0.2
step 200, val accuracy 0.2
step 210, training accuracy 0.2
step 220, training accuracy 0.2
step 230, training accuracy 0.3
step 240, training accuracy 0.25
step 240, val accuracy 0.3
step 250, training accuracy 0.25
step 260, training accuracy 0.2
step 270, training accuracy 0.15
step 280, training accuracy 0.1
step 280, val accuracy 0.35
step 290, training accuracy 0.2
step 300, training accuracy 0.25
step 310, training accuracy 0.3
step 320, training accuracy 0.3
step 320, val accuracy 0.2
step 330, training accuracy 0.2
step 340, training accuracy 0.2
step 350, training accuracy 0.15
step 360, training accuracy 0.35
step 360, val accuracy 0.15
step 370, training accuracy 0.3
step 380, training accuracy 0.3
step 390, training accuracy 0.45
step 400, training accuracy 0.35
step 400, val accuracy 0.3
step 410, training accuracy 0.3
step 420, training accuracy 0.25
step 430, training accuracy 0.3
step 440, training accuracy 0.4
step 440, val accuracy 0.15
step 450, training accuracy 0.4
step 460, training accuracy 0.3
step 470, training accuracy 0.3
step 480, training accuracy 0.25
step 480, val accuracy 0.35
step 490, training accuracy 0.15
step 500, training accuracy 0.25
step 510, training accuracy 0.3
step 520, training accuracy 0.45
step 520, val accuracy 0.35
step 530, training accuracy 0.2
step 540, training accuracy 0.3
step 550, training accuracy 0.4
step 560, training accuracy 0.2
step 560, val accuracy 0.05
step 570, training accuracy 0.2
step 580, training accuracy 0.2
step 590, training accuracy 0.2
step 600, training accuracy 0.25
step 600, val accuracy 0.35
step 610, training accuracy 0.55
step 620, training accuracy 0.35
step 630, training accuracy 0.15
step 640, training accuracy 0.3
step 640, val accuracy 0.3
step 650, training accuracy 0.25
step 660, training accuracy 0.5
step 670, training accuracy 0.15
step 680, training accuracy 0.25
step 680, val accuracy 0.65
step 690, training accuracy 0.15
step 700, training accuracy 0.2
step 710, training accuracy 0.4
step 720, training accuracy 0.25
step 720, val accuracy 0.2
step 730, training accuracy 0.25
step 740, training accuracy 0.15
step 750, training accuracy 0.2
step 760, training accuracy 0.4
step 760, val accuracy 0.25
step 770, training accuracy 0.15
step 780, training accuracy 0.2
step 790, training accuracy 0.3
step 800, training accuracy 0.2
step 800, val accuracy 0.3
step 810, training accuracy 0.3
step 820, training accuracy 0.35
step 830, training accuracy 0.35
step 840, training accuracy 0.25
step 840, val accuracy 0.35
step 850, training accuracy 0.2
step 860, training accuracy 0.2
step 870, training accuracy 0.25
step 880, training accuracy 0.4
step 880, val accuracy 0.35
step 890, training accuracy 0.35
step 900, training accuracy 0.45
step 910, training accuracy 0.2
step 920, training accuracy 0.4
step 920, val accuracy 0.25
step 930, training accuracy 0.3
step 940, training accuracy 0.4
step 950, training accuracy 0.3
step 960, training accuracy 0.25
step 960, val accuracy 0.4
step 970, training accuracy 0.25
step 980, training accuracy 0.15
step 990, training accuracy 0.4

In [ ]: